package com.shujia.spark.sql

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

object Demo1SparkSession {

  def main(args: Array[String]): Unit = {

    /**
      * SparkSession： spark2.0之后统一的入口，可以代替sparkContex和SqlContext
      *
      */
    val spark: SparkSession = SparkSession
      .builder()
      .appName("session")
      .master("local")
      .config("spark.sql.shuffle.partitions",1)  ////spark sql shuffle之后df的分区数据，如果在集群中运行，默认是200
      .getOrCreate()

    //导入spark 相关的隐式转换
    import spark.implicits._

    //读取json格式的数据
    val studentDF: DataFrame = spark.read.json("data/students.json")

    //查看数据
    //studentDF.show
    //打印表结构
    //studentDF.printSchema

    //选择
    studentDF.select("name","age")//.show()

    //$取列对象，可以对列进行计算
    //as取别名
    studentDF.select($"name",$"age" + 1 as "age")//.show()

    //过滤
    studentDF.where($"age" > 23)//.show()

    //分组统计
    studentDF.groupBy($"clazz").count()//.show()

    //创建临时视图
    studentDF.createOrReplaceTempView("student")

    val clazzNumDF: DataFrame = spark.sql("select clazz,count(1) as clazzNum from student group by clazz")

    //clazzNumDF.show()

    /**
      * sql 执行顺序
      * from  --> join --> on ---> where ---> group by --> having --> select  --> order by --> limit
      *
      */

    //保存数据

    clazzNumDF.write
      .mode(SaveMode.Overwrite)
      .option("sep","\t")
      .csv("data/json")

  }

}
